var $ = require('cheerio')
var request = require('request')
var db = require('../db/data')
var co = require('co')

class article {
  constructor(){
    this.title = ''
    this.checktime = new Date()
    this.timer = ''
    this.author = ''
    this.content = ''
  }
}

function sleep(time){
  return new Promise((resolve, reject) => {
    setTimeout(resolve, time)
  })
}

//得到链接的html文本
function gethtml(link){
  return new Promise(function(resolve, reject) {
    request(link, function(error, response, html){
      if(error){reject(error)}
        resolve(html)
      })
  })
}

//得到一个链接下一层的链接
function getlink(link,node){
  return co(function* () {
    var html = yield gethtml(link)
    var topwords = $(node, html)
    var links = []
    for(let i=0;i<topwords.length;i++){
      var artlink = topwords[i].attribs.href
      links.push(artlink)
    }
    return links
  })
}

function finddb(act){
  return new Promise(function(resolve, reject) {
    db.collection('weixin').find({title: act.title}).toArray(function (err, items) {
      if(err){reject(err)}
      resolve(items)
    })
  })
}

function savedb(category, nodeArgs, links){
  return co(function* () {
    for(var i=0;i<links.length;i++){
        var html = yield gethtml(links[i])
        var act = new article()
        act.title = $(nodeArgs.title, html).text().replace(/[\r \n '']/g, "")
        act.timer = $(nodeArgs.timer, html).text() || false
        act.author = $(nodeArgs.author, html).text() || false
        act.content = html
        var items = yield finddb(act)
        if(!items.length && act.title){
          db.collection(category).save(act,function(err,data){
            if(err){console.log(err)}
          })
        }
        yield sleep(5000)
    }
  })
}

function run(category, zhartnode, link, node){
  co(function* () {
    var links = yield getlink(link,node)
    savedb(category,zhartnode, links)
  })
}

var wxartnode = {
  title: "h2.rich_media_title",
  timer: "em#post-date",
  author: "a#post-user"
}
var zhartnode = {
  title: 'h1',
  timer: 'time',
  author: 'a.PostIndex-authorName'
}

exports.zhihu_weixin = setInterval(function(){
  try{
    console.log("  \_ Grab WeChat zhihu data")
    run('zhihu', zhartnode, 'http://zhihu.sogou.com/', 'ul.news-list p.tit a')
    run('weixin', wxartnode, 'http://weixin.sogou.com/', 'h3 a')
    console.log("  \_ this is end, wait...")
  }catch(err){
    console.log(err)
  }
}, 5*60*1000)
